#!/usr/bin/env python3
"""
make_kernel_from_eigs.py

Generate a scalar kernel.npy from the eigenvalue diagnostics in
``kernel_eigs.csv``.  Many of the Volume 4 pipelines require a
one‑dimensional array of per‑link kernel values (ρ) as their primary
input.  This helper reads the shared ``kernel_eigs.csv`` file from
``data/`` and broadcasts or tiles the ``rho`` column so that the final
array has the correct length for the lattice specified in
``config.yaml``.  The resulting NumPy array is written to
``data/kernel.npy``.

This script is intentionally simple and has no external dependencies
beyond NumPy and pandas.  It prints the shape of the generated
kernel to aid in verification.
"""

import os
import yaml
import numpy as np
import pandas as pd


def main() -> None:
    # Determine paths relative to this script
    repo_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
    config_path = os.path.join(repo_root, 'config.yaml')
    data_dir = os.path.join(repo_root, 'data')
    eigs_path = os.path.join(data_dir, 'kernel_eigs.csv')
    out_path = os.path.join(data_dir, 'kernel.npy')

    # Load configuration to infer lattice size
    with open(config_path, 'r') as f:
        cfg = yaml.safe_load(f)

    # Lattice size may appear at the top level or nested under a
    # 'parameters' section depending on the repository.  Check both.
    L = None
    if isinstance(cfg, dict):
        # direct field
        if 'lattice_size' in cfg:
            L = int(cfg['lattice_size'])
        # nested parameters
        if L is None and 'parameters' in cfg and 'lattice_size' in cfg['parameters']:
            L = int(cfg['parameters']['lattice_size'])
    if L is None:
        raise KeyError('Could not determine lattice_size from config.yaml')

    # Number of oriented links on a 2D L×L lattice: 2*L^2
    num_links = 2 * L * L

    # Read eigenvalue file.  The required column may be named 'rho'
    # (as in Volume 3 diagnostics) or something similar.  Use the first
    # numeric column if 'rho' is not present.
    df = pd.read_csv(eigs_path)
    if 'rho' in df.columns:
        rho_vals = df['rho'].to_numpy(dtype=float)
    else:
        # take the first column with numeric dtype
        for col in df.columns:
            if pd.api.types.is_numeric_dtype(df[col]):
                rho_vals = df[col].to_numpy(dtype=float)
                break
        else:
            raise KeyError('No numeric column found in kernel_eigs.csv')

    # Tile or truncate rho_vals to match num_links.  If the eigenvalue
    # diagnostic contains fewer values than required, repeat it
    # periodically.  If it contains more, trim the extra entries.
    if rho_vals.size == 0:
        raise ValueError('rho column in kernel_eigs.csv is empty')
    reps = int(np.ceil(num_links / rho_vals.size))
    K = np.tile(rho_vals, reps)[:num_links]
    # Save as float64 to ensure downstream compatibility
    np.save(out_path, K.astype(float))
    print(f'Generated kernel.npy with shape {K.shape} and dtype {K.dtype} at {out_path}')


if __name__ == '__main__':
    main()